pacman::p_load(tidyverse,openxlsx,reshape2,xtable)

setwd("")#set working directory

precinct.num <- function(df) {
  vec1<-c()
  vec2<-c()
  frac <- df%>%
    dplyr::select(is.numeric)
  for (z in 1:ncol(frac)){
    vec2<-append(as.numeric(apply(frac[,z]%>%
                                    as.data.frame(.)%>%
                                    na.omit(.),2,length)),vec2)
  }
  for (i in 1:(ncol(frac)-1)){
    for (j in (i+1):ncol(frac)){
      vec1<-append((sum(complete.cases(frac[,i], frac[,j]))),vec1)
      print(sum(complete.cases(frac[,i], frac[,j])))
    }
  }
  return(list(max.complete=max(vec1),
              min.complete=min(vec1),
              min.obs=min(vec2),
              max.obs=max(vec2)))
}#extract number of 1) complete observations across candidates and 2) number of non-NA values for each candidate

#Import ----
oh.dem.pres.2020 <-read.xlsx("OH/OH_DemPrimaries_2020.xlsx") %>%
  .[(c(-1,-2)),]
oh.dem.pres.2016 <-read.xlsx("OH/OH_DemPrimaries_2016.xlsx") %>%
  .[(c(-1,-2)),]
oh.dem.sen.2010 <- read.xlsx("OH/OH_DemPrimaries_2010.xlsx") %>%
  dplyr::select(COUNTY.NUMBER, STATE.PRC.CODE, PRECINCT.NAME,`U.S..Senate.(D).-.Jennifer.Brunner`, `U.S..Senate.(D).-.Lee.Fisher`)
oh.gop.sen.2018 <- read.xlsx("OH/OH_GOPPrimaries_2018.xlsx") %>%
  .[(c(-1,-2)),]
oh.gop.pres.2016 <- read.xlsx("OH/OH_GOPPrimaries_2016.xlsx") %>%
  .[(c(-1,-2)),]
oh.gop.pres.2012 <- read.xlsx("OH/OH_GOPPrimaries_2012.xlsx") %>%
  dplyr::select(COUNTY_NAME, PRECINCT_CODE, PRECINCT_NAME, `President.(R).-.Gingrich,.Newt`, `President.(R).-.Huntsman,.Jon`, 
         `President.(R).-.Paul,.Ron`, `President.(R).-.Perry,.Rick`, `President.(R).-.Romney,.Mitt`, `President.(R).-.Santorum,.Rick`) %>%
  .[c(-9650),]
oh.gop.pres.2024 <- read.csv("OH/OH_GOPPresidentialPrimaries_2024.csv") %>%
  .[3:8880,]
oh.gop.sen.2024 <- read.csv("OH/OH_GOPSenatePrimaries_2024.csv") %>%
  .[3:8880,]

# Democrats ----
# 2020 - Biden, Sanders only to get more than 10%
oh.dem.pres.2020.sub <- oh.dem.pres.2020 %>%
  dplyr::select(Precinct.Code, County.Name, Precinct.Name, Media.Market, Registered.Voters, Turnout.Percentage,
         Region.Name, Total.Voters, `Joseph.R..Biden.Jr.(D)`, `Bernie.Sanders.(D)`, everything())

oh.dem.pres.2020.sub$total_vote <- rowSums(oh.dem.pres.2020.sub[,(which(colnames(oh.dem.pres.2020.sub)=="Total.Voters")+1):ncol(oh.dem.pres.2020.sub)])
oh.dem.pres.2020.sub$biden.2020.pct <- oh.dem.pres.2020.sub$`Joseph.R..Biden.Jr.(D)` / oh.dem.pres.2020.sub$total_vote
oh.dem.pres.2020.sub$sanders.2020.pct <- oh.dem.pres.2020.sub$`Bernie.Sanders.(D)` / oh.dem.pres.2020.sub$total_vote

questionr::freq(paste0(oh.dem.pres.2020.sub$Precinct.Code, oh.dem.pres.2020.sub$County.Name))%>%
  filter(n>1)#county.name + precinct.code is a unique identifier

oh.dem.pres.2020.sub <- oh.dem.pres.2020.sub %>%
  dplyr::select(Precinct.Code, Precinct.Name, County.Name, biden.2020.pct, sanders.2020.pct)

# 2016 - Clinton, Sanders only to get more than 10%
oh.dem.pres.2016.sub <- oh.dem.pres.2016 %>%
  dplyr::select(Precinct.Code, County.Name, Precinct.Name, Media.Market, Registered.Voters,
         Region.Name, Total.Voters, everything())

questionr::freq(paste0(oh.dem.pres.2016.sub$Precinct.Code, oh.dem.pres.2016.sub$County.Name))%>%
  filter(n>1)#county.name + precinct.code is a unique identifier

oh.dem.pres.2016.sub$total_vote <- rowSums(oh.dem.pres.2016.sub[,(which(colnames(oh.dem.pres.2016.sub)=="Total.Voters")+1):ncol(oh.dem.pres.2016.sub)])
oh.dem.pres.2016.sub$clinton.2016.pct <- oh.dem.pres.2016.sub$`Hillary.Clinton.(D)` / oh.dem.pres.2016.sub$total_vote
oh.dem.pres.2016.sub$sanders.2016.pct <- oh.dem.pres.2016.sub$`Bernie.Sanders.(D)` / oh.dem.pres.2016.sub$total_vote

oh.dem.pres.2016.sub <- oh.dem.pres.2016.sub %>%
  dplyr::select(Precinct.Code, Precinct.Name, County.Name, clinton.2016.pct, sanders.2016.pct)

oh.dem.2010.2016.2020 <- oh.dem.pres.2016.sub %>%
  full_join(oh.dem.pres.2020.sub, by=c("Precinct.Code", "County.Name")) %>%
  dplyr::select(clinton.2016.pct, sanders.2016.pct, biden.2020.pct, sanders.2020.pct)

cormatr <- cor(oh.dem.2010.2016.2020,use="pairwise.complete.obs")
cormatr2 <- cormatr[,-c(1,2)]
print(xtable(cormatr2, digits=c(0,3,3))) 

precinct.num(oh.dem.2010.2016.2020)
print(xtable(cormatr2, 
             label="t:oh:agg", 
             caption=c("Correlations of vote shares in Democratic primaries from Ohio precincts (n=8802-8929)."),
             digits=c(0,rep(3,2))),
      file=c("tableA25.tex"))#table A25

tableA25 <- readLines("tableA25.tex")
tableA25 <- gsub("clinton.2016.pct", "Clinton '16", tableA25)
tableA25 <- gsub("sanders.2016.pct", "Sanders '16", tableA25)
tableA25 <- gsub("biden.2020.pct", "Biden '20", tableA25)
tableA25 <- gsub("sanders.2020.pct", "Sanders '20", tableA25)
writeLines(tableA25, "tableA25.tex")

#prepare corrs table for regression analysis 
cormatd3 <- data.frame(cbind(variable_2 = rownames(cormatr2), cormatr2))
cormatd4 <- reshape2::melt(cormatd3, id = c("variable_2"))

#extract years as a separate column
cormatd4$year_cand1_0 <- ifelse(grepl("2008.pct", cormatd4$variable_2), 2008, 0)
cormatd4$year_cand1_1 <- ifelse(grepl("2010.pct", cormatd4$variable_2), 2010, 0)
cormatd4$year_cand1_2 <- ifelse(grepl("2016.pct", cormatd4$variable_2), 2016, 0)
cormatd4$year_cand1_3 <- ifelse(grepl("2018.pct", cormatd4$variable_2), 2018, 0)
cormatd4$year_cand1_4 <- ifelse(grepl("2020.pct", cormatd4$variable_2), 2020, 0)
cormatd4$year_cand1_5 <- ifelse(grepl("2012.pct", cormatd4$variable_2), 2012, 0)
cormatd4$year_cand1 <- cormatd4$year_cand1_0 + cormatd4$year_cand1_1 + cormatd4$year_cand1_2 + cormatd4$year_cand1_3 + cormatd4$year_cand1_4 + cormatd4$year_cand1_5
cormatd4$year_cand2_0 <- ifelse(grepl("2008.pct", cormatd4$variable), 2008, 0)
cormatd4$year_cand2_1 <- ifelse(grepl("2010.pct", cormatd4$variable), 2010, 0)
cormatd4$year_cand2_2 <- ifelse(grepl("2016.pct", cormatd4$variable), 2016, 0)
cormatd4$year_cand2_3 <- ifelse(grepl("2018.pct", cormatd4$variable), 2018, 0)
cormatd4$year_cand2_4 <- ifelse(grepl("2020.pct", cormatd4$variable), 2020, 0)
cormatd4$year_cand2_5 <- ifelse(grepl("2012.pct", cormatd4$variable), 2012, 0)
cormatd4$year_cand2 <- cormatd4$year_cand2_0 + cormatd4$year_cand2_1 + cormatd4$year_cand2_2 + cormatd4$year_cand2_3 + cormatd4$year_cand2_4 + cormatd4$year_cand2_5
cormatd5 <-cormatd4 %>%
  subset(., select=c(variable_2, year_cand1, variable, year_cand2, value))

cormatd5$value <- as.numeric(cormatd5$value)
cormatd5$variable <- as.character(cormatd5$variable)
cormatd5$party <- "DEMS"
cormatd5$state <- 'OH'
write.csv(cormatd5, "oh_dems_groupbyyear_corrs.csv")

dems.grouped<-cormatd5%>%
  mutate(year_cand1=as.character(year_cand1))%>%
  mutate(year_cand2=as.character(year_cand2))%>%
  filter(year_cand1!=year_cand2)%>%
  filter(variable_2!=variable)%>%
  group_by(party,year_cand1,year_cand2)%>%
  dplyr::summarise(cor_mean=mean(value,na.rm=T), 
                   cor_median=median(value,na.rm=T))

# Republicans ----

# 2024 - Moreno, Dolan, Larose get more than 10% of the vote
oh.gop.sen.2024$total_vote <- rowSums(oh.gop.sen.2024[,(which(colnames(oh.gop.sen.2024)=="Region.Name")+1):ncol(oh.gop.sen.2024)])
oh.gop.sen.2024$dolan.2024.pct <- oh.gop.sen.2024$`Matt.Dolan..R.` / oh.gop.sen.2024$total_vote
oh.gop.sen.2024$larose.2024.pct <- oh.gop.sen.2024$`Frank.LaRose..R.` / oh.gop.sen.2024$total_vote
oh.gop.sen.2024$moreno.2024.pct <- oh.gop.sen.2024$`Bernie.Moreno..R.` / oh.gop.sen.2024$total_vote

oh.gop.sen.2024.sub <- oh.gop.sen.2024 %>%
  dplyr::select(Precinct.Code, County.Name, Precinct.Name, dolan.2024.pct, larose.2024.pct, moreno.2024.pct)

questionr::freq(paste0(oh.gop.sen.2024$Precinct.Code, oh.gop.sen.2024$County.Name))%>%
  filter(n>1)#county.name + precinct.code is a unique identifier

# 2024 - Trump, Haley get more than 10% of the vote
colnames(oh.gop.pres.2024)
oh.gop.pres.2024%>%
  group_by(County.Name)%>%
  dplyr::summarize(trump=sum(Donald.J..Trump..R.,na.rm=T),
                   haley=sum(Nikki.R..Haley..R.,na.rm=T),
                   sum=sum(Nikki.R..Haley..R., Ron.DeSantis..R., Donald.J..Trump..R., Chris..Christie..R.))%>%
  ungroup()%>%
  filter(County.Name=="Cuyahoga")#sanity check on Cuyahoga—appears correct. It seems to be the case that Ballots.Counted combines the different primaries, not using it. 

oh.gop.pres.2024$total_vote <- rowSums(oh.gop.pres.2024[,(which(colnames(oh.gop.pres.2024)=="Official.Voter.Turnout")+1):ncol(oh.gop.pres.2024)])
oh.gop.pres.2024$trump.2024.pct <- oh.gop.pres.2024$`Donald.J..Trump..R.` / oh.gop.pres.2024$total_vote
oh.gop.pres.2024$haley.2024.pct <- oh.gop.pres.2024$`Nikki.R..Haley..R.` / oh.gop.pres.2024$total_vote

questionr::freq(paste0(oh.gop.pres.2024$Precinct.Code, oh.gop.pres.2024$County.Name))%>%
  filter(n>1)#county.name + precinct.code is a unique identifier

oh.gop.pres.2024.sub <- oh.gop.pres.2024 %>%
  dplyr::select(Precinct.Code, County.Name, Precinct.Name, trump.2024.pct, haley.2024.pct)

# 2018 - Renacci, Gibbons, Ackison only to get more than 10%
oh.gop.sen.2018$total_vote <- rowSums(oh.gop.sen.2018[,(which(colnames(oh.gop.sen.2018)=="Turnout.Percentage")+1):ncol(oh.gop.sen.2018)])
oh.gop.sen.2018$ackison.2018.pct <- oh.gop.sen.2018$`Melissa.Ackison.(R)` / oh.gop.sen.2018$total_vote
oh.gop.sen.2018$gibbons.2018.pct <- oh.gop.sen.2018$`Mike.Gibbons.(R)` / oh.gop.sen.2018$total_vote
oh.gop.sen.2018$renacci.2018.pct <- oh.gop.sen.2018$`Jim.Renacci.(R)` / oh.gop.sen.2018$total_vote

questionr::freq(paste0(oh.gop.sen.2018$Precinct.Code, oh.gop.sen.2018$County.Name))%>%
  filter(n>1)#county.name + precinct.code is a unique identifier

oh.gop.sen.2018.sub <- oh.gop.sen.2018 %>%
  dplyr::select(Precinct.Code, County.Name, Precinct.Name, ackison.2018.pct, gibbons.2018.pct, renacci.2018.pct)

# 2016 - Kasich, Trump, Cruz only to get more than 10%
oh.gop.pres.2016$total_vote <- rowSums(oh.gop.pres.2016[,(which(colnames(oh.gop.pres.2016)=="Total.Voters")+1):ncol(oh.gop.pres.2016)])
oh.gop.pres.2016$kasich.2016.pct <- oh.gop.pres.2016$`John.R..Kasich.(R)` / oh.gop.pres.2016$total_vote
oh.gop.pres.2016$trump.2016.pct <- oh.gop.pres.2016$`Donald.J..Trump.(R)` / oh.gop.pres.2016$total_vote
oh.gop.pres.2016$cruz.2016.pct <- oh.gop.pres.2016$`Ted.Cruz.(R)` / oh.gop.pres.2016$total_vote

questionr::freq(paste0(oh.gop.pres.2016$Precinct.Code, oh.gop.pres.2016$County.Name))%>%
  filter(n>1)#county.name + precinct.code is a unique identifier

oh.gop.pres.2016.sub <- oh.gop.pres.2016 %>%
  dplyr::select(Precinct.Code, County.Name, Precinct.Name, kasich.2016.pct, trump.2016.pct, cruz.2016.pct)

# 2012 - Romney, Santorum, Gingrich only to get more than 10%
oh.gop.pres.2012$total_vote <- rowSums(oh.gop.pres.2012[,(which(colnames(oh.gop.pres.2012)=="PRECINCT_NAME")+1):ncol(oh.gop.pres.2012)])
oh.gop.pres.2012$gingrich.2012.pct <- oh.gop.pres.2012$`President.(R).-.Gingrich,.Newt` / oh.gop.pres.2012$total_vote
oh.gop.pres.2012$santorum.2012.pct <- oh.gop.pres.2012$`President.(R).-.Santorum,.Rick` / oh.gop.pres.2012$total_vote
oh.gop.pres.2012$romney.2012.pct <- oh.gop.pres.2012$`President.(R).-.Romney,.Mitt` / oh.gop.pres.2012$total_vote

oh.gop.pres.2012.sub <- oh.gop.pres.2012 %>%
  dplyr::select(PRECINCT_CODE, COUNTY_NAME, PRECINCT_NAME, gingrich.2012.pct, santorum.2012.pct, romney.2012.pct)

questionr::freq(paste0(oh.gop.pres.2012$Precinct.Code, oh.gop.pres.2012$County.Name))%>%
  filter(n>1)#county.name + precinct.code is a unique identifier

#clean names
oh.gop.pres.2012.sub <- oh.gop.pres.2012.sub %>%
  dplyr::rename(Precinct.Code = "PRECINCT_CODE",
                County.Name = "COUNTY_NAME",
                Precinct.Name = "PRECINCT_NAME")

oh.gop.2012.2016.2018.2024 <- oh.gop.pres.2016.sub %>%
  full_join(oh.gop.pres.2012.sub, by=c("Precinct.Code", "County.Name")) %>%
  full_join(oh.gop.sen.2018.sub,  by=c("Precinct.Code", "County.Name")) %>%
  full_join(oh.gop.sen.2024.sub,  by=c("Precinct.Code", "County.Name")) %>%
  full_join(oh.gop.pres.2024.sub,  by=c("Precinct.Code", "County.Name")) %>%
  dplyr::select(gingrich.2012.pct, santorum.2012.pct, romney.2012.pct, 
         cruz.2016.pct, trump.2016.pct, kasich.2016.pct, 
         ackison.2018.pct, gibbons.2018.pct, renacci.2018.pct,
         haley.2024.pct, trump.2024.pct,
         dolan.2024.pct, larose.2024.pct, moreno.2024.pct)

cormatr <- cor(oh.gop.2012.2016.2018.2024,use="pairwise.complete.obs")
cormatr2 <- cormatr[,-c(1,2,3)]

precinct.num(oh.gop.2012.2016.2018.2024)

print(xtable(cormatr2, 
             label="t:oh-gop-matrix", 
             caption=c("Correlations of vote shares in Republican primaries from Ohio precincts (n=8512-9485)."),
             digits=c(0,rep(3,11))),
      scalebox=0.6, 
      file=c("tableA24.tex"))#table A24

tableA24 <- readLines("tableA24.tex")
tableA24 <- gsub("gingrich.2012.pct", "Gingrich '12", tableA24)
tableA24 <- gsub("santorum.2012.pct", "Santorum '12", tableA24)
tableA24 <- gsub("romney.2012.pct", "Romney '12", tableA24)
tableA24 <- gsub("cruz.2016.pct", "Cruz '16", tableA24)
tableA24 <- gsub("trump.2016.pct", "Trump '16", tableA24)
tableA24 <- gsub("kasich.2016.pct", "Kasich '16", tableA24)
tableA24 <- gsub("ackison.2018.pct", "Ackison '18", tableA24)
tableA24 <- gsub("gibbons.2018.pct", "Gibbons '18", tableA24)
tableA24 <- gsub("renacci.2018.pct", "Renacci '18", tableA24)
tableA24 <- gsub("haley.2024.pct", "Haley '24", tableA24)
tableA24 <- gsub("trump.2024.pct", "Trump '24", tableA24)
tableA24 <- gsub("dolan.2024.pct", "Dolan '24", tableA24)
tableA24 <- gsub("larose.2024.pct", "Larose '24", tableA24)
tableA24 <- gsub("moreno.2024.pct", "Moreno '24", tableA24)
writeLines(tableA24, "tableA24.tex")

#prepare corrs table for regression analysis
cormatd3 <- data.frame(cbind(variable_2 = rownames(cormatr2), cormatr2))
cormatd4 <- reshape2::melt(cormatd3, id = c("variable_2"))

#extract years as a separate column
cormatd4$year_cand1_0 <- ifelse(grepl("2008.pct", cormatd4$variable_2), 2008, 0)
cormatd4$year_cand1_1 <- ifelse(grepl("2010.pct", cormatd4$variable_2), 2010, 0)
cormatd4$year_cand1_2 <- ifelse(grepl("2016.pct", cormatd4$variable_2), 2016, 0)
cormatd4$year_cand1_3 <- ifelse(grepl("2018.pct", cormatd4$variable_2), 2018, 0)
cormatd4$year_cand1_4 <- ifelse(grepl("2020.pct", cormatd4$variable_2), 2020, 0)
cormatd4$year_cand1_5 <- ifelse(grepl("2012.pct", cormatd4$variable_2), 2012, 0)
cormatd4$year_cand1_6 <- ifelse(grepl("2024.pct", cormatd4$variable_2), 2024, 0)
cormatd4$year_cand1 <- cormatd4$year_cand1_0 + cormatd4$year_cand1_1 + cormatd4$year_cand1_2 + cormatd4$year_cand1_3 + cormatd4$year_cand1_4 + cormatd4$year_cand1_5+ cormatd4$year_cand1_6
cormatd4$year_cand2_0 <- ifelse(grepl("2008.pct", cormatd4$variable), 2008, 0)
cormatd4$year_cand2_1 <- ifelse(grepl("2010.pct", cormatd4$variable), 2010, 0)
cormatd4$year_cand2_2 <- ifelse(grepl("2016.pct", cormatd4$variable), 2016, 0)
cormatd4$year_cand2_3 <- ifelse(grepl("2018.pct", cormatd4$variable), 2018, 0)
cormatd4$year_cand2_4 <- ifelse(grepl("2020.pct", cormatd4$variable), 2020, 0)
cormatd4$year_cand2_5 <- ifelse(grepl("2012.pct", cormatd4$variable), 2012, 0)
cormatd4$year_cand2_6 <- ifelse(grepl("2024.pct", cormatd4$variable), 2024, 0)
cormatd4$year_cand2 <- cormatd4$year_cand2_0 + cormatd4$year_cand2_1 + cormatd4$year_cand2_2 + cormatd4$year_cand2_3 + cormatd4$year_cand2_4 + cormatd4$year_cand2_5  + cormatd4$year_cand2_6
cormatd5 <-cormatd4 %>%
  subset(., select=c(variable_2, year_cand1, variable, year_cand2, value))

cormatd5$value <- as.numeric(cormatd5$value)
cormatd5$variable <- as.character(cormatd5$variable)
cormatd5$party <- "GOP"
cormatd5$state <- 'OH'
write.csv(cormatd5, "oh_gop_groupbyyear_corrs.csv")

gop.grouped<-cormatd5%>%
  mutate(year_cand1=ifelse(variable_2 %in% c("moreno.2024.pct","larose.2024.pct","dolan.2024.pct"), "2024*", as.character(year_cand1)))%>%
  mutate(year_cand2=ifelse(variable %in% c("moreno.2024.pct","larose.2024.pct","dolan.2024.pct"), "2024*", as.character(year_cand2)))%>%
  filter(year_cand1!=year_cand2)%>%
  filter(variable_2!=variable)%>%
  group_by(party,year_cand1,year_cand2)%>%
  dplyr::summarise(cor_mean=mean(value,na.rm=T), 
                   cor_median=median(value,na.rm=T))

#prepare corr summary table
corr.summary<-rbind(dems.grouped, gop.grouped)%>%
  setNames(c("Party","Year 1", "Year 2", "Corr. Mean", "Corr. Median"))%>%
  mutate(Party=Party%>%recode(
    "DEMS"="Democrats", 
    "GOP"="Republicans"
  ))

#prepare voteshares-----
#2020-DEMS-PRES
oh.dem.pres.2020.voteshare <- colSums(oh.dem.pres.2020[,(which(colnames(oh.dem.pres.2020)=="Turnout.Percentage")+1):ncol(oh.dem.pres.2020)]) / 
  sum(oh.dem.pres.2020[,(which(colnames(oh.dem.pres.2020)=="Turnout.Percentage")+1):ncol(oh.dem.pres.2020)]) 
oh.dem.pres.2020.voteshare <- oh.dem.pres.2020.voteshare %>% as.data.frame(.)
oh.dem.pres.2020.voteshare$cand <- row.names(oh.dem.pres.2020.voteshare)
colnames(oh.dem.pres.2020.voteshare)[1] <- "voteshare"
row.names(oh.dem.pres.2020.voteshare) <- NULL
oh.dem.pres.2020.voteshare$year <- 2020

#2016-DEMS-PRES
oh.dem.pres.2016.voteshare <- colSums(oh.dem.pres.2016[,(which(colnames(oh.dem.pres.2016)=="Total.Voters")+1):ncol(oh.dem.pres.2016)]) / 
  sum(oh.dem.pres.2016[,(which(colnames(oh.dem.pres.2016)=="Total.Voters")+1):ncol(oh.dem.pres.2016)]) 
oh.dem.pres.2016.voteshare <- oh.dem.pres.2016.voteshare %>% as.data.frame(.)
oh.dem.pres.2016.voteshare$cand <- row.names(oh.dem.pres.2016.voteshare)
colnames(oh.dem.pres.2016.voteshare)[1] <- "voteshare"
row.names(oh.dem.pres.2016.voteshare) <- NULL
oh.dem.pres.2016.voteshare$year <- 2016

#2010-DEMS-SEN, NOTE: Excluding due to apparent issue with the underlying data. 
#oh.dem.sen.2010.voteshare <- colSums(oh.dem.sen.2010[,(which(colnames(oh.dem.sen.2010)=="PRECINCT.NAME")+1):ncol(oh.dem.sen.2010)]) / 
#  sum(oh.dem.sen.2010[,(which(colnames(oh.dem.sen.2010)=="PRECINCT.NAME")+1):ncol(oh.dem.sen.2010)]) 
#oh.dem.sen.2010.voteshare <- oh.dem.sen.2010.voteshare %>% as.data.frame(.)
#oh.dem.sen.2010.voteshare$cand <- row.names(oh.dem.sen.2010.voteshare)
#colnames(oh.dem.sen.2010.voteshare)[1] <- "voteshare"
#row.names(oh.dem.sen.2010.voteshare) <- NULL
#oh.dem.sen.2010.voteshare$year <- 2010
#oh.dem.sen.2010.voteshare #NOTE: Issue with the underlying raw data. It seems that the Fisher votes are not coded properly. Excluded from the analysis. 

#2018-GOP-SEN
oh.gop.sen.2018.voteshare <- colSums(oh.gop.sen.2018[,(which(colnames(oh.gop.sen.2018)=="Turnout.Percentage")+1):(which(colnames(oh.gop.sen.2018)=="total_vote")-1)]) / 
  sum(oh.gop.sen.2018[,(which(colnames(oh.gop.sen.2018)=="Turnout.Percentage")+1):(which(colnames(oh.gop.sen.2018)=="total_vote")-1)])
oh.gop.sen.2018.voteshare <- oh.gop.sen.2018.voteshare %>% as.data.frame(.)
oh.gop.sen.2018.voteshare$cand <- row.names(oh.gop.sen.2018.voteshare)
colnames(oh.gop.sen.2018.voteshare)[1] <- "voteshare"
row.names(oh.gop.sen.2018.voteshare) <- NULL
oh.gop.sen.2018.voteshare$year <- 2018

#2016-GOP-PRES
oh.gop.pres.2016.voteshare <- colSums(oh.gop.pres.2016[,(which(colnames(oh.gop.pres.2016)=="Total.Voters")+1):(which(colnames(oh.gop.pres.2016)=="total_vote")-1)]) / 
  sum(oh.gop.pres.2016[,(which(colnames(oh.gop.pres.2016)=="Total.Voters")+1):(which(colnames(oh.gop.pres.2016)=="total_vote")-1)])
oh.gop.pres.2016.voteshare <- oh.gop.pres.2016.voteshare %>% as.data.frame(.)
oh.gop.pres.2016.voteshare$cand <- row.names(oh.gop.pres.2016.voteshare)
colnames(oh.gop.pres.2016.voteshare)[1] <- "voteshare"
row.names(oh.gop.pres.2016.voteshare) <- NULL
oh.gop.pres.2016.voteshare$year <- 2016

#2012-GOP-PRES
oh.gop.pres.2012.voteshare <-colSums(oh.gop.pres.2012[,(which(colnames(oh.gop.pres.2012)=="PRECINCT_NAME")+1):(which(colnames(oh.gop.pres.2012)=="total_vote")-1)]) / 
  sum(oh.gop.pres.2012[,(which(colnames(oh.gop.pres.2012)=="PRECINCT_NAME")+1):(which(colnames(oh.gop.pres.2012)=="total_vote")-1)])
oh.gop.pres.2012.voteshare <- oh.gop.pres.2012.voteshare %>% as.data.frame(.)
oh.gop.pres.2012.voteshare$cand <- row.names(oh.gop.pres.2012.voteshare)
colnames(oh.gop.pres.2012.voteshare)[1] <- "voteshare"
row.names(oh.gop.pres.2012.voteshare) <- NULL
oh.gop.pres.2012.voteshare$year <- 2012

#2024-GOP-PRES
oh.gop.pres.2024.voteshare <- colSums(oh.gop.pres.2024[,(which(colnames(oh.gop.pres.2024)=="Official.Voter.Turnout")+1):(which(colnames(oh.gop.pres.2024)=="total_vote")-1)]) / 
  sum(oh.gop.pres.2024[,(which(colnames(oh.gop.pres.2024)=="Official.Voter.Turnout")+1):(which(colnames(oh.gop.pres.2024)=="total_vote")-1)])
oh.gop.pres.2024.voteshare <- oh.gop.pres.2024.voteshare %>% as.data.frame(.)
oh.gop.pres.2024.voteshare$cand <- row.names(oh.gop.pres.2024.voteshare)
colnames(oh.gop.pres.2024.voteshare)[1] <- "voteshare"
row.names(oh.gop.pres.2024.voteshare) <- NULL
oh.gop.pres.2024.voteshare$year <- 2024

#2024-GOP-SEN
oh.gop.sen.2024.voteshare <- colSums(oh.gop.sen.2024[,(which(colnames(oh.gop.sen.2024)=="Region.Name")+1):(which(colnames(oh.gop.sen.2024)=="total_vote")-1)]) / 
  sum(oh.gop.sen.2024[,(which(colnames(oh.gop.sen.2024)=="Region.Name")+1):(which(colnames(oh.gop.sen.2024)=="total_vote")-1)])
oh.gop.sen.2024.voteshare <- oh.gop.sen.2024.voteshare %>% as.data.frame(.)
oh.gop.sen.2024.voteshare$cand <- row.names(oh.gop.sen.2024.voteshare)
colnames(oh.gop.sen.2024.voteshare)[1] <- "voteshare"
row.names(oh.gop.sen.2024.voteshare) <- NULL
oh.gop.sen.2024.voteshare$year <- 2024

oh.voteshares <- rbind(oh.gop.sen.2024.voteshare, oh.gop.pres.2024.voteshare, oh.gop.pres.2012.voteshare, oh.gop.pres.2016.voteshare,
                       oh.gop.sen.2018.voteshare, oh.dem.pres.2016.voteshare, oh.dem.pres.2020.voteshare)
write.csv(oh.voteshares, "oh-voteshares.csv")
